The aim of this report is investigate the expression data distribution patters of user-defined genes (max 10) in combined expression data (UMCCR_PC.counts.matrix.txt.subset.txt) derived from different samples. The data combination includes filtering out genes with low counts , transformation (CPM method, followed by log2-transformation) and normalisation (TMM). The pipeline is based on recommendations from RNAseq123 package.

### Define functions

##### Create 'not in' operator
"%!in%" <- function(x,table) match(x,table, nomatch = 0) == 0
##### Prepare object to write into a file
prepare2write <- function (x) {
  
  x2write <- cbind(rownames(x), x)
  colnames(x2write) <- c("Gene",colnames(x))
  return(x2write)
}
##### Prepare gene data matrix to write into a file
geneMatrix2write <- function (x) {
  
  x2write <- cbind(rownames(x), x)
  colnames(x2write) <- c("Gene",colnames(x))
  return(x2write)
}

##### Assign colours to different groups
getTargetsColours <- function(targets) {
  
  ##### Predefined selection of colours for groups
  targets.colours <- c("red","blue","green","darkgoldenrod","darkred","deepskyblue", "coral", "cornflowerblue", "chartreuse4", "bisque4", "chocolate3", "cadetblue3", "darkslategrey", "lightgoldenrod4", "mediumpurple4", "orangered3","indianred1","blueviolet","darkolivegreen4","darkgoldenrod4","firebrick3","deepskyblue4", "coral3", "dodgerblue1", "chartreuse3", "bisque3", "chocolate4", "cadetblue", "darkslategray4", "lightgoldenrod3", "mediumpurple3", "orangered1")
  
  f.targets <- factor(targets)
  vec.targets <- targets.colours[1:length(levels(f.targets))]
  targets.colour <- rep(0,length(f.targets))
  for(i in 1:length(f.targets))
    targets.colour[i] <- vec.targets[ f.targets[i]==levels(f.targets)]
  
  return( list(vec.targets, targets.colour) )
}

##### Assign colours to different distributions
getGenesColours <- function(genes) {
  
  ##### Predefined selection of colours for genes
  genes.colours <- rainbow(length(genes))

  f.genes <- factor(genes)
  vec.genes <- genes.colours[1:length(levels(f.genes))]
  genes.colour <- rep(0,length(f.genes))
  for(i in 1:length(f.genes)){
    genes.colour[i] <- vec.genes[ f.genes[i]==levels(f.genes)]
  }
  
  return( genes.colour )
}

##### Assign colours to different samples
getSamplesColours <- function(samples) {
  
  ##### Predefined selection of colours for genes
  samples.colours <- brewer.pal(length(samples), "Set1")

  f.samples <- factor(samples)
  vec.samples <- samples.colours[1:length(levels(f.samples))]
  samples.colour <- rep(0,length(f.samples))
  for(i in 1:length(f.samples)){
   samples.colour[i] <- vec.samples[ f.samples[i]==levels(f.samples)] 
  }
  
  return( samples.colour )
}

##### Calculate TPM from RPKM (from http://luisvalesilva.com/datasimple/rna-seq_units.html )
tpm_from_rpkm <- function(x) {
  rpkm.sum <- colSums(x)
  return(t(t(x) / (1e-06 * rpkm.sum)))
}

##### Convert density to counts
density2freq <- function(density) {

  freq = length(density)/sum(density) * density
  return(freq)
}
  

##### Generate density plot for selected genes. Currently, this function works for one gene at a time
comparPlot <- function(genes, dataset1, dataset2, main_title, x_title, y_title, sampleName, plot_mode = "static") {
  
  ##### Used data for user-defined genes
  dataset1 <- dataset1[ genes, ,drop=FALSE]
  dataset2 <- dataset2[ genes, ,drop=FALSE]
 
  ##### Make sure that samples are in the same order
  dataset2 <- dataset2[ , colnames(dataset1) ,drop=FALSE]
  
  #### Interactive genes distribution plot
  ##### Create empty data frame
  data.df <- data.frame(matrix(ncol = 4, nrow = 0))
  colnames(data.df) <- c("gene", "sample", "dataset1", "dataset2")
  
  for (i in 1:nrow(dataset1)) {
      data.df <- rbind(data.df, data.frame( gene = genes[i], sample = names(sort(dataset1[ genes[i], ])), dataset1 = dataset1[ genes[i], ], dataset2 = dataset2[ genes[i], ]))
  }
  
  ##### Extract expression for selected sample in the distributions dataframe
  data.df.selected <- data.frame(matrix(ncol = 5, nrow = 0))
  colnames(data.df.selected) <- c("data", "gene", "sample", "expr", "dens")
  
  for ( i in 1:length(sampleName) ) {
    data.df.selected <- rbind(data.df.selected, data.df[sampleName[i] == data.df$sample, ])
  }
  
  ##### Assign colours to distributions
  genes.colour <- getGenesColours(unique(data.df$gene))
  
  ##### Assign colours to samples
  sample.colour <- getSamplesColours(unique(data.df.selected$sample))

  
  ##### Generate interactive density plot
  p <- plotly::plot_ly(data.df, x = ~dataset1, y = ~dataset2, type = 'scatter', color = ~gene, colors = genes.colour, text = data.df$sample, width = 00, height = 400) %>%
    plotly::add_markers(y = data.df.selected$dataset2, x = data.df.selected$dataset1, 
                name = data.df.selected$sample,
                text = data.df.selected$sample,
                mode = 'markers',
                marker = list(size = 8, colors = data.df.selected$sample, color = rep(sample.colour, each = nrow(data.df.selected)/length(samples)), line = list(color = "grey", width = 2)),
                showlegend = TRUE, inherit = FALSE) %>%
    
    ##### Add Loess smoothed line
    plotly::add_lines(x = data.df$dataset1, y = ~fitted(loess(data.df$dataset2 ~ data.df$dataset1)),
            line = list(color = 'grey'),
            name = "Loess Smoother", opacity = 0.5, showlegend = TRUE) %>%
    plotly::layout(title = main_title,
           xaxis = list(title = x_title),
           yaxis = list (title = y_title))
  
return( p )
}

##### Generate density plot for selected genes. Currently, this function works for one gene at a time
densityPlot <- function(genes, data, main_title, x_title, sampleName, distributions = NULL, plot_mode = "static") {
  
  ##### Used data for user-defined genes
  data <- data[ genes, ,drop=FALSE]
  
  ##### Transformed data for html report
  #### Interactive genes distribution plot
  ##### Create empty data frame
  data.df <- data.frame(matrix(ncol = 4, nrow = 0))
  colnames(data.df) <- c("gene", "sample", "expr", "dens")
  
  for (i in 1:nrow(data)) {
      data.df <- rbind(data.df, data.frame(gene = genes[i], sample = names(sort(data[ genes[i], ])), expr = sort(data[ genes[i], ]), dens = density2freq(density(data[i,], n=ncol(data))$y)))
  }
  
  ##### Generate values to generate various distributions
  if ( !is.null(distributions) && length(genes) == 1) {
    
    ##### Use the density values obtained from the expression values
    expr.sorted <- sort(data[ genes, ])
    
    ##### Get min and max values based on the expression data
    data.x <- seq(min(expr.sorted), max(expr.sorted), length.out = ncol(data))
    
    ##### Create empty data frame
    data.df.dist <- data.frame(matrix(ncol = 4, nrow = 0))
    colnames(data.df.dist) <- c("gene", "sample", "expr", "dens")
    
    ##### Generate y-values to mirror distributions of interest
    ##### Generate y-values for normal distribution
    if ( "normal" %in% tolower(distributions)) {
      
      ##### Generate y-values for normal distribution
      data.y <- dnorm(data.x, mean = mean(data.x), sd = (max(data.x)-mean(data.x))/5)
      data.df.dist <- rbind(data.df.dist, data.frame(gene="Normal distribution", sample = names(sort(data[ genes, ])), expr=data.x, dens=density2freq(data.y)))
    } 
    
    if ( "binomial" %in% tolower(distributions) ) {
      
      ##### Generate x- and y-values for binomial distribution
      #data.x <- round(seq(min(data), max(data), length.out = ncol(data)), digits = 0)
      data.x <- 1:ncol(data)
      data.y <- dbinom(data.x, ncol(data), 0.25)
      
      data.x <- rescale(data.x, to = c(min(expr.sorted), max(expr.sorted)))
      data.df.dist <- rbind(data.df.dist, data.frame(gene="Binomial distribution (p=0.25)", sample = names(sort(data[ genes, ])), expr=data.x, dens=density2freq(data.y)))
      
      data.x <- 1:ncol(data)
      data.y <- dbinom(data.x, ncol(data), 0.75)
      
      data.x <- rescale(data.x, to = c(min(expr.sorted), max(expr.sorted)))
      data.df.dist <- rbind(data.df.dist, data.frame(gene="Binomial distribution (p=0.75)", sample = names(sort(data[ genes, ])), expr=data.x, dens=density2freq(data.y)))
    }
    
    if ("bimodal" %in% tolower(distributions)){
      
      ##### Draw n/2 samples from a normal distributions with one median and another n/2 samples from a second normal distribution with a different median
      data.x1 <- seq(min(expr.sorted), median(expr.sorted), length.out = ncol(data)/2)
      data.x2 <- seq(median(expr.sorted), max(expr.sorted), length.out = ncol(data)/2)
      
      ##### Combine both normal distributions to generate a bimodal distribution
      data.x <- c(data.x1, data.x2)
      
      ##### Generate y-values for bimodal distribution
      data.y <- c(dnorm(data.x1, mean = mean(data.x1), sd = (max(data.x1)-mean(data.x1))/3), dnorm(data.x2, mean = mean(data.x2), sd = (max(data.x2)-mean(data.x2))/3))
      
      ##### Add bimodal dist values to the distribution dataframe
      data.df.dist <- rbind(data.df.dist, data.frame(gene="Bimodal distribution", sample = names(sort(data[ genes, ])), expr=data.x, dens=density2freq(data.y)))
    }
    
    data.df <- rbind(data.df, data.df.dist)
    
    ##### Extract expression for selected sample in the distributions dataframe
    data.df.selected <- data.frame(matrix(ncol = 4, nrow = 0))
    colnames(data.df.selected) <- c("gene", "sample", "expr", "dens")
    for ( i in 1:length(sampleName) ) {
      data.df.selected <- rbind(data.df.selected, data.df[sampleName[i] == data.df$sample, ])
    }
  }
  
  ##### Get min and max values based on the expression data
  den.x <- sort(data.df$expr)
  den.y <- sort(data.df$dens)
  
  ##### Assign colours to distributions
  genes.colour <- getGenesColours(unique(data.df$gene))
  
  ##### Assign colours to samples
  sample.colour <- getSamplesColours(unique(data.df.selected$sample))

  
  ##### Generate interactive density plot
  p <- plotly::plot_ly(data.df, x = ~expr, y = ~dens, type = 'scatter', mode = 'lines', color = ~gene, colors = genes.colour, width = 1000, height = 300) %>%
    plotly::add_markers(y = data.df.selected$dens, x = data.df.selected$expr, 
                name = data.df.selected$sample,
                text = data.df.selected$sample,
                mode = 'markers',
                marker = list(size = 8, colors = data.df.selected$sample, color = rep(sample.colour, each = nrow(data.df.selected)/length(samples)), line = list(color = "grey", width = 2)),
                showlegend = TRUE,
                inherit = FALSE) %>%
     plotly::layout(title = main_title,
           xaxis = list(title = x_title, range = c(den.x[1],den.x[length(den.x)])),
           yaxis = list (title = 'Weigth', range = c(den.y[1],den.y[length(den.y)])))
  
return( p )
}
### Load libraries
suppressMessages(library(preprocessCore))
suppressMessages(library(edgeR))
suppressMessages(library(tidyverse))
suppressMessages(library(package=paste0("EnsDb.Hsapiens.v", params$ensembl_version), character.only = TRUE))
suppressMessages(library(plotly))
suppressMessages(library(scales))
suppressMessages(library(truncnorm))
suppressMessages(library(RColorBrewer))
##### Read in expression data and associated sample annotation files
data <- read.table(paste(params$exprDir, params$exprFile, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names = 1)
targetFile <- read.table(paste(params$exprDir, params$annotFile, sep="/"), sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]

##### Make sure that there are no duplciated samples in the target file
targetFile <- targetFile[!duplicated(targetFile[,"Sample_name"]),]
rownames(targetFile) <- targetFile[,"Sample_name"]

##### Make syntactically valid names
colnames(data) <- make.names(colnames(data))
rownames(targetFile) <- make.names(rownames(targetFile))

##### Make sure that the target file contains info only about samples present in the data matrix
targetFile <- targetFile[ rownames(targetFile) %in% colnames(data),  ]

#### Prepare input samples of interest
samples <- unique(unlist(strsplit(params$samples, split=',', fixed=TRUE)))

##### Change the target file to mark the sample of interest
targetFile$Target[match(params$samples, targetFile$Sample_name)] <- "Sample"

##### Make sure that the samples order in the data matrix is the same as in the target file. If the expression matrix contains data for additional samples then write the data subset (containing only samples from the target file) into a file
if ( !all( colnames(data) %in% rownames(targetFile) , na.rm = FALSE)  ) {
  
  data <- data[ , rownames(targetFile) ]

  ##### Save data subset into a file
  write.table(geneMatrix2write(data), file=paste0(normalizePath(params$exprDir), "/", paste0(params$exprFile, ".subset.txt")), sep="\t", quote=FALSE, row.names=FALSE, append = FALSE)
}

Library size

Bar-plot illustrating library size for each sample.

suppressMessages(library(plotly))
##### Generate bar-plot for library size. The colours indicate sample groups, as provided in *Target* column in the sample annotation file

##### Assigne colours to targets and datasets
targets.colour <- getTargetsColours(targetFile$Target)

##### Prepare data frame
data.df <- data.frame(targetFile$Target, colnames(data), as.numeric(colSums(data)*1e-6))
colnames(data.df) <- c("Group","Sample", "Library_size")

##### The default order will be alphabetized unless specified as below
data.df$Sample <- factor(data.df$Sample, levels = data.df[["Sample"]])
p <- plot_ly(data.df, x = ~Sample, y = ~Library_size, color = ~Group, type = 'bar', colors = targets.colour[[1]], width = 1000, height = 400) %>%
  layout(title = "", xaxis = list( tickfont = list(size = 10), title = ""), yaxis = list(title = "Library size (millions)"), margin = list(l=50, r=50, b=150, t=50, pad=4), autosize = F, legend = list(orientation = 'v', y = 0.5), showlegend=TRUE)

##### Print htmlwidget
p
##### Save the bar-plot as html (PLOTLY)
#htmlwidgets::saveWidget(as_widget(p), paste0(normalizePath(params$exprDir), "/", paste0(params$results_name, "_RNAseq_libSize.html")), selfcontained = TRUE)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
##### Loop through data and annotate genes
##### Convert data into a data frame to make the Ensembl ID and gene symbol matches (with merge function)
data.df <- as.data.frame(cbind(rownames(data), data))
colnames(data.df)[1] <- "ENSEMBL"

##### Get genes annotation and genomic locations
edb <- eval(parse(text = paste0("EnsDb.Hsapiens.v", params$ensembl_version)))
  
##### Get keytypes for gene SYMBOL
keys <- keys(edb, keytype="GENEID")
  
##### Get genes genomic coordiantes
gene_info <- ensembldb::select(edb, keys=keys, columns=c("GENEID", "GENEBIOTYPE", "GENENAME", "SEQNAME", "GENESEQSTART", "GENESEQEND"), keytype="GENEID")
names(gene_info) <- gsub("GENEID", "ENSEMBL", names(gene_info))
names(gene_info) <- gsub("GENENAME", "SYMBOL", names(gene_info))
  
##### Limit genes annotation to those genes for which sample expression measurments are available
gene_info <-  gene_info[ gene_info$ENSEMBL %in% data.df$ENSEMBL,  ]
  
##### Remove rows with duplicated ENSEMBL IDs
gene_info = gene_info[!duplicated(gene_info$ENSEMBL),]
rownames(gene_info) <- gene_info$ENSEMBL
  
##### Remove rows with duplicated gene symbols (Y_RNAs, SNORs, LINC0s etc)
gene_info = gene_info[!duplicated(gene_info$SYMBOL),]
  

##### Merge genes genomic coordinates info with their annotation and expression data
data.annot <- merge(gene_info, data.df, by = "ENSEMBL", all.x = FALSE)
rownames(data.annot) <- data.annot$SYMBOL
  
##### Get data matrix with gene symbols
data <- data.annot[, names(data.annot) %!in% c("SYMBOL", "GENEBIOTYPE", "ENSEMBL", "SEQNAME", "GENESEQSTART", "GENESEQEND")]

Data transformation and filtering

The read count data is converted into CPMs using edgeR functions. Genes with low counts were filtered out. The data is log2-transformed. Plot(s) below present CPM data distribution.

##### Filtering to remove low expressed genes. For differential expression and related analyses, gene expression is rarely considered at the level of raw counts since libraries sequenced at a greater depth will result in higher counts. Rather, it is common practice to transform raw counts onto a scale that accounts for such library size differences. Genes with very low counts across all libraries provide little evidence for differential expression. In the biological point of view, a gene must be expressed at some minimal level before it is likely to be translated into a protein or to be biologically important. In addition, the pronounced discretenes of these counts interferes with some of the statistical approximations that are used later in the pipeline. These genes should be filtered out prior to further analysis. Users should filter with CPM rather than filtering on the counts directly, as the latter does not account for differences in library sizes between samples. For instance for the CPM-transformed data we keep only genes that have CPM of 1
##### Transformation to CPM or TPM scale (see these blogs for details https://www.rna-seqblog.com/rpkm-fpkm-and-tpm-clearly-explained/ and https://haroldpimentel.wordpress.com/2014/05/08/what-the-fpkm-a-review-rna-seq-expression-units/ ).  CPM = Counts Per Million,  TPM = Transcripts Per Kilobase Million. 
##### CPM transformation and filtering
if ( params$filter && params$transform == "CPM" ) {
  
  ##### Create EdgeR DGEList object
  y <- edgeR::DGEList(counts=data,  group=targetFile$Target)
  
  ##### Remove genes with CPM of at least 1 in less than 10% of samples
  #cat("The CPM of 1 (cut-off for removing low expressed genes) corresponds to", round(min(as.numeric(colSums(data)*1e-6)), digits=0), "reads in sample with the lowest sequencing depth, and", round(max(as.numeric(colSums(data)*1e-6)), digits=0), "reads in sample with the greatest sequencing depth\n")
  
  keep <- rowSums(cpm(y)>1) >= ncol(data)/10
  y$filtered <- y[keep, , keep.lib.sizes=FALSE]
  
  #cat(nrow(y$filtered$counts), "genes remained after filtering out of the", nrow(data), "genes in the input read count matrix\n\n")
  
  ##### Transform the raw-scale to CPM. Add small offset to each observation to avoid taking log of zero
  y$samples["norm.factors"] <- edgeR::calcNormFactors(y, method = "none")$samples["norm.factors"]
  y$transformed <- edgeR::cpm(y, normalized.lib.sizes=FALSE, log=params$log, prior.count=0.25)
  y$filtered.transformed <- edgeR::cpm(y$filtered, normalized.lib.sizes=FALSE, log=params$log, prior.count=0.25)
  
##### CPM transformation without filtering
} else if ( !params$filter && params$transform == "CPM" ) {
  
  ##### Create EdgeR DGEList object
  y <- edgeR::DGEList(counts=data,  group=targetFile$Target)
  
  ##### Transform the raw-scale to CPM. Add small offset to each observation to avoid taking log of zero
  y$transformed <- edgeR::cpm(y, normalized.lib.sizes=FALSE, log=params$log, prior.count=0.25)
  
##### TPM data transformation. We can convert RPKM to TPM in two different ways: from pre-calculated RPKM, by diving by the sum of RPKM values, or directly from the normalized counts. Here we calculate TPM starting from RPKM values computed using edgeR's rpkm function ( from http://luisvalesilva.com/datasimple/rna-seq_units.html )
##### TPM transformation with filtering
} else if ( params$filter && params$transform == "TPM" ) {
  
  ##### Get genes lengths
  edb <- eval(parse(text = paste0("EnsDb.Hsapiens.v", params$ensembl_version)))
  
  gene.length <- lengthOf(edb, filter = GeneIdFilter(data.annot$ENSEMBL))
  
  ##### Check for which genes the lenght info is not available and remove them from the data
  genes.no_length <- data.annot$ENSEMBL[ data.annot$ENSEMBL %!in% names(gene.length)]
  data <- data[ data.annot$ENSEMBL %!in% genes.no_length, ]
  
  ##### Create EdgeR DGEList object
  y <- edgeR::DGEList(counts=data,  group=targetFile$Target)
  
  ##### Convert data into RPKM
  y$transformed <- edgeR::rpkm(y, gene.length = gene.length, normalized.lib.sizes=FALSE, log=FALSE)
  
  ##### Remove genes with TPM of at least 0.2 in less than 10% of samples
  keep <- rowSums(y$transformed>0.2) >= ncol(y$transformed)/10
  y$filtered <- y$counts[keep, ]
  y$filtered.transformed <- y$transformed[keep, ]
  
  ##### ... and then to TPM scale. Add small offset to each observation to avoid taking log of zero
  if ( params$log ) {
    
    y$transformed <- log2(tpm_from_rpkm(y$transformed+0.25))
    y$filtered.transformed <- log2(tpm_from_rpkm(y$filtered.transformed+0.25))
  
  } else {
    
    y$transformed <- tpm_from_rpkm(y$transformed)
    y$filtered.transformed <- tpm_from_rpkm(y$filtered.transformed)
  }
  
##### TPM transformation without filtering
} else if ( !params$filter && params$transform == "TPM" ) {
  
  ##### Get genes lengths
  edb <- eval(parse(text = paste0("EnsDb.Hsapiens.v", params$ensembl_version)))
  
  gene.length <- lengthOf(edb, filter = GeneIdFilter(data.annot$ENSEMBL))
  
  ##### Check for which genes the lenght info is not available and remove them from the data
  genes.no_length <- data.annot$ENSEMBL[ data.annot$ENSEMBL %!in% names(gene.length)]
  data <- data[ data.annot$ENSEMBL %!in% genes.no_length, ]
  
  ##### Create EdgeR DGEList object
  y <- edgeR::DGEList(counts=data,  group=targetFile$Target)
  
  ##### Convert data into RPKM
  y$transformed <- edgeR::rpkm(y, gene.length = gene.length, normalized.lib.sizes=FALSE, log=FALSE)
  
  ##### ... and then to TPM scale. Add small offset to each observation to avoid taking log of zero
  if ( params$log ) {
    
    y$transformed <- log2(tpm_from_rpkm(y$transformed+0.25))
  
  } else {
    
    y$transformed <- tpm_from_rpkm(y$transformed)
  }
}
##### Collect the most extreme density values for set the x-axis and y-axis boundaries
den.x <- density(y$transformed[,1])$x
den.y <- density(y$transformed[,1])$y
  
for (i in 2:ncol(y$transformed)) {
    
  den <- density(y$transformed[,i])
  den.x <- sort(c(den.x, den$x))
  den.y <- sort(c(den.y, den$y))
}
  
if ( params$filter ) {

  par(mfrow=c(1,2))
  
  ##### Before filtering
  plot(density(y$transformed[,1]), lwd=2, xlim=c(den.x[1],den.x[length(den.x)]), ylim=c(den.y[1],den.y[length(den.y)]), las=2, main="", xlab="", col=targets.colour[[2]][1])
  title(main="Transformed data (unfiltered)", xlab=params$transform)
  abline(v=0, lty=3)
  
  for (i in 2:ncol(y$transformed)){
    den <- density(y$transformed[,i])
    lines(den$x, den$y, lwd=2, col=targets.colour[[2]][i])
  }
  legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], bty="n", bg = "transparent")
  
  ##### After filtering
  plot(density(y$filtered.transformed[,1]), lwd=2, xlim=c(den.x[1],den.x[length(den.x)]), ylim=c(den.y[1],den.y[length(den.y)]), las=2, main="", xlab="", col=targets.colour[[2]][1])
  title(main="Transformed and filtered data", xlab=params$transform)
  abline(v=0, lty=3)
  
  for (i in 2:ncol(y$filtered.transformed)){
    den <- density(y$filtered.transformed[,i])
    lines(den$x, den$y, lwd=2, col=targets.colour[[2]][i])
  }
  legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], bty="n", bg = "transparent")
  
  ##### Save the plot as pdf file
  pdf(paste0(normalizePath(params$exprDir), "/", paste0(params$results_name, "_filtering.pdf")), width=8, height=5)
  par(mfrow=c(1,2))
  
  ##### Before filtering
  plot(density(y$transformed[,1]), lwd=2, xlim=c(den.x[1],den.x[length(den.x)]), ylim=c(den.y[1],den.y[length(den.y)]), las=2, main="", xlab="", col=targets.colour[[2]][1])
  title(main="Transformed data (unfiltered)", xlab=params$transform)
  abline(v=0, lty=3)
  
  for (i in 2:ncol(y$transformed)){
    den <- density(y$transformed[,i])
    lines(den$x, den$y, lwd=2, col=targets.colour[[2]][i])
  }
  legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], bty="n", bg = "transparent")
  
  ##### After filtering
  plot(density(y$filtered.transformed[,1]), lwd=2, xlim=c(den.x[1],den.x[length(den.x)]), ylim=c(den.y[1],den.y[length(den.y)]), las=2, main="", xlab="", col=targets.colour[[2]][1])
  title(main="Transformed and filtered data", xlab=params$transform)
  abline(v=0, lty=3)
  
  for (i in 2:ncol(y$filtered.transformed)){
    den <- density(y$filtered.transformed[,i])
    lines(den$x, den$y, lwd=2, col=targets.colour[[2]][i])
  }
  legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], bty="n", bg = "transparent")
  invisible(dev.off())
  
} else {
  
  ##### Without filtering
  plot(density(y$transformed[,1]), lwd=2, xlim=c(den.x[1],den.x[length(den.x)]), ylim=c(den.y[1],den.y[length(den.y)]), las=2, main="", xlab="", col=targets.colour[[2]][1])
  title(main="Transformed data (unfiltered)", xlab=params$transform)
  abline(v=0, lty=3)
  
  for (i in 2:ncol(y$transformed)){
    den <- density(y$transformed[,i])
    lines(den$x, den$y, lwd=2, col=targets.colour[[2]][i])
  }
  legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], bty="n", bg = "transparent")
  
  ##### Save the plot as pdf file
  pdf(paste0(normalizePath(params$exprDir), "/", paste0(params$results_name, "_filtering.pdf")), width=8, height=5)
  ##### Without filtering
  plot(density(y$transformed[,1]), lwd=2, xlim=c(den.x[1],den.x[length(den.x)]), ylim=c(den.y[1],den.y[length(den.y)]), las=2, main="", xlab="", col=targets.colour[[2]][1])
  title(main="Transformed data (unfiltered)", xlab=params$transform)
  abline(v=0, lty=3)
  
  for (i in 2:ncol(y$transformed)){
    den <- density(y$transformed[,i])
    lines(den$x, den$y, lwd=2, col=targets.colour[[2]][i])
  }
  legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], bty="n", bg = "transparent")
  invisible(dev.off())
}


Data normalisation

During the sample preparation or sequencing process, external factors that are not of biological interest can affect the expression of individual samples. For example, samples processed in the first batch of an experiment can have higher expression overall when compared to samples processed in a second batch. It is assumed that all samples should have a similar range and distribution of expression values. Normalisation for sample-specific effects is required to ensure that the expression distributions of each sample are similar across the entire experiment. Normalisation is performed using TMM method.

Box-plots below present CPM data for individual samples, coloured by sample groups, before and after TMM normalisation.

##### TMM normalsation. Trimmed mean of M-values (https://www.ncbi.nlm.nih.gov/pubmed/20196867) (TMM) is performed using the calcNormFactors function in edgeR. The normalisation factors calculated here are used as a scaling factor for the library sizes. TMM is the recommended for most RNA-Seq data where the majority (more than half) of the genes are believed not differentially expressed between any pair of the samples. It adjusts for RNA composition effect, calculates scaling factors for the library sizes with calcNormFactors function using trimmed mean of M-values (TMM) between each pair of samples. Note, that the raw read counts are used to calculate the normalisation factors
if ( params$transform == "CPM" ) {
  
  ##### Calculate normalization factors and  transformations from the raw-scale to CPM and normalisation using user-defined method
  if ( params$filter ) {
    
    y$noNorm <- y$filtered.transformed
    y$filtered$samples["norm.factors"] <- edgeR::calcNormFactors(y$filtered, method = params$norm)$samples["norm.factors"]
    y$norm <- edgeR::cpm(y$filtered, normalized.lib.sizes=TRUE, log=params$log, prior.count=0.25)
  
  } else {
    
    y$noNorm <- y$transformed
    y$samples["norm.factors"] <- edgeR::calcNormFactors(y, method = params$norm)$samples["norm.factors"]
    y$norm <- edgeR::cpm(y, normalized.lib.sizes=TRUE, log=params$log, prior.count=0.25)
  }
  
##### Quantile normalsation (from https://www.biostars.org/p/296992/ )
} else if ( params$transform == "TPM" ) {
  
  ##### Normalisation using quantile method
  if ( params$filter ) {
    
    y$noNorm <- y$filtered.transformed
    y$filtered.transformed <- data.matrix(y$filtered.transformed) 
    
    if ( tolower(params$norm) != "none" ) {
      
      y$norm  <- normalize.quantiles(y$filtered.transformed, copy = TRUE)
      colnames(y$norm) <- colnames(y$filtered.transformed)
      rownames(y$norm) <- rownames(y$filtered.transformed)
      
    } else {
      
      y$norm  <- y$filtered.transformed
    }
  
  } else {
    
    y$noNorm <- y$transformed
    y$transformed <- data.matrix(y$transformed)
    
    if ( tolower(params$norm) != "none" ) {
      
      y$norm  <- normalize.quantiles(y$transformed, copy = TRUE)
      colnames(y$norm) <- colnames(y$transformed)
      rownames(y$norm) <- rownames(y$transformed)
      
    } else {
      
      y$norm  <- y$transformed
    }
  }
}

##### Set height scaling factor for plots containing normalised data plots
if ( tolower(params$norm) != "none" ) {
  
  norm.height = 2
} else {
  norm.height = 1
}
##### Plot expression distribution of samples for unnormalised and normalised data
par(mfrow=c(norm.height,1), mar=c((max(nchar(colnames(y$noNorm)))+3)/2, 5, 3, 2))
    
##### Unnormalised data
boxplot(y$noNorm, las=2, col=targets.colour[[2]], main="", pch=".", las=3)
  title(main="Unnormalised data", ylab=params$transform)
legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], horiz=TRUE, bg = "transparent", box.col="transparent")
    
##### Normalised data
if ( tolower(params$norm) != "none" ) {
      
  boxplot(y$norm, las=2, col=targets.colour[[2]], main="", pch=".", las=3)
  title(main=paste0("Normalised data (", params$norm, ")"), ylab=params$transform)
  legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], horiz=TRUE, bg = "transparent", box.col="transparent")
}

##### Save the plot as pdf file
pdf(paste0(normalizePath(params$exprDir), "/", paste0(params$results_name, "_normalisation.pdf")), width=8, height=14)
par(mfrow=c(norm.height,1), mar=c(max(nchar(colnames(y$noNorm)))/2, 5, 3, 2))
    
##### Unnormalised data
boxplot(y$noNorm, las=2, col=targets.colour[[2]], main="", pch=".", las=3)
title(main="Unnormalised data", ylab=params$transform)
    legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], horiz=TRUE, bg = "transparent", box.col="transparent")
      
##### Normalised data
if ( tolower(params$norm) != "none" ) {
      
  boxplot(y$norm, las=2, col=targets.colour[[2]], main="", pch=".", las=3)
  title(main=paste0("Normalised data (", params$norm, ")"), ylab=params$transform)
    legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], horiz=TRUE, bg = "transparent",box.col="transparent")
}
invisible(dev.off())

Z-score transformation

This step aims to put data from different sources onto the same scale. Z-scores, or standard scores, indicate how many standard deviations an observation is above or below the mean. The z-score reflects a standard normal deviate - the variation of across the standard normal distribution, which is a normal distribution with mean equal to zero and standard deviation equal to one.

Box-plot below present z-score transformed data for individual samples, coloured according to corresponding groups.

##### Perform Z-score transformation
if ( tolower(params$norm) == "none" ) {
  
    y$z <- scale(y$norm)
    
} else {
    y$z <- scale(y$noNorm)
}
##### Plot expression distribution of samples for unnormalised and normalised data
par(mfrow=c(1,1), mar=c((max(nchar(colnames(y$z)))+3)/2, 5, 3, 2))
    
boxplot(y$z, las=2, col=targets.colour[[2]], main="", pch=".", las=3)
  title(main="Z-score transformed data", ylab="Z-score")
legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], horiz=TRUE, bg = "transparent", box.col="transparent")

##### Save the plot as pdf file
pdf(paste0(normalizePath(params$exprDir), "/", paste0(params$results_name, "_z_scores.pdf")), width=8, height=7)
par(mfrow=c(1,1), mar=c(max(nchar(colnames(y$z)))/2, 5, 3, 2))
    
boxplot(y$z, las=2, col=targets.colour[[2]], main="", pch=".", las=3)
title(main="Z-score transformed data", ylab="Z-score")
    legend("topright", legend=levels(factor(targetFile$Target)), fill=targets.colour[[1]], horiz=TRUE, bg = "transparent", box.col="transparent")
invisible(dev.off())

Data processing effects

Scatter-plots illustrating the effect of data normalisation and z-score transformation on the CPM data. The CPM and z-score data, respectively, are plotted as a function of normalised CPM (TMM) values.

##### Subset data to include only user-defined genes
genes <- unique(unlist(strsplit(params$genes, split=',', fixed=TRUE)))
y.sub <- y

##### Identify user-defined genes that were not present in the expression matrix
if ( params$filter ) {
    
  genes.missing <- genes[ genes %!in% rownames(y$filtered.transformed) ]
  genes <- genes[ genes %in% rownames(y$filtered.transformed)  ]
    
  keep <- rownames(y$filtered.transformed) %in% genes
  y.sub$filtered.transformed <- y$filtered.transformed[keep, , drop=FALSE]
    
} else {
    
  genes.missing <- genes[ genes %!in% rownames(y$transformed) ]
  genes <- genes[ genes %in% rownames(y$transformed)  ]
    
  keep <- rownames(y$transformed) %in% genes
  y.sub$transformed <- y.sub$transformed[keep, , drop=FALSE]
}

y.sub$noNorm <- y.sub$noNorm[keep, , drop=FALSE]
y.sub$z <- y.sub$z[keep, , drop=FALSE]
  
if ( tolower(params$norm) != "none" ) {
  y.sub$norm <- y.sub$norm[keep, , drop=FALSE]
}

##### Write list of genes into a file
if ( length(genes) > 0 ) {
    write.table(prepare2write(genes), file = paste0(normalizePath(params$exprDir), "/", paste0(params$results_name, ".genes.txt")), sep="\t", quote=FALSE, row.names=TRUE, append = FALSE )
  
  runChunk <- TRUE
} else {
  runChunk <- FALSE
}

##### Write list of missing genes into a file
if ( exists("genes.missing") && length(genes.missing) > 0 ) {
  
  write.table(prepare2write(genes.missing), file = paste0(normalizePath(params$exprDir), "/", paste0(params$results_name, ".genes.missing.txt")), sep="\t", quote=FALSE, row.names=TRUE, append = FALSE )
}

KRAS

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[1]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
  
  ##### For normalisation ON
  if ( tolower(params$norm) != "none" ) {
    
    ##### For normalisation ON
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=transformed.data, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title=params$transform, sampleName=samples )
      
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo+1]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=y.sub$z, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title="Z-score", sampleName=samples )
      
  ##### For normalisation OFF
  } else {
    
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=transformed.data, dataset2=y.sub$z, main_title="Z-score transformation", x_title=params$transform, y_title="Z-score", sampleName=samples )
  }
}

##### Print a list of htmlwidgets
widges.list

CDKN2A

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[2]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
  
  ##### For normalisation ON
  if ( tolower(params$norm) != "none" ) {
    
    ##### For normalisation ON
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=transformed.data, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title=params$transform, sampleName=samples )
      
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo+1]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=y.sub$z, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title="Z-score", sampleName=samples )
      
  ##### For normalisation OFF
  } else {
    
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=transformed.data, dataset2=y.sub$z, main_title="Z-score transformation", x_title=params$transform, y_title="Z-score", sampleName=samples )
  }
}

##### Print a list of htmlwidgets
widges.list

TP53

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[3]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
  
  ##### For normalisation ON
  if ( tolower(params$norm) != "none" ) {
    
    ##### For normalisation ON
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=transformed.data, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title=params$transform, sampleName=samples )
      
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo+1]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=y.sub$z, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title="Z-score", sampleName=samples )
      
  ##### For normalisation OFF
  } else {
    
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=transformed.data, dataset2=y.sub$z, main_title="Z-score transformation", x_title=params$transform, y_title="Z-score", sampleName=samples )
  }
}

##### Print a list of htmlwidgets
widges.list

SMAD4

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[4]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
  
  ##### For normalisation ON
  if ( tolower(params$norm) != "none" ) {
    
    ##### For normalisation ON
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=transformed.data, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title=params$transform, sampleName=samples )
      
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo+1]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=y.sub$z, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title="Z-score", sampleName=samples )
      
  ##### For normalisation OFF
  } else {
    
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=transformed.data, dataset2=y.sub$z, main_title="Z-score transformation", x_title=params$transform, y_title="Z-score", sampleName=samples )
  }
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[5]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
  
  ##### For normalisation ON
  if ( tolower(params$norm) != "none" ) {
    
    ##### For normalisation ON
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=transformed.data, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title=params$transform, sampleName=samples )
      
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo+1]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=y.sub$z, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title="Z-score", sampleName=samples )
      
  ##### For normalisation OFF
  } else {
    
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=transformed.data, dataset2=y.sub$z, main_title="Z-score transformation", x_title=params$transform, y_title="Z-score", sampleName=samples )
  }
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[6]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
  
  ##### For normalisation ON
  if ( tolower(params$norm) != "none" ) {
    
    ##### For normalisation ON
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=transformed.data, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title=params$transform, sampleName=samples )
      
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo+1]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=y.sub$z, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title="Z-score", sampleName=samples )
      
  ##### For normalisation OFF
  } else {
    
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=transformed.data, dataset2=y.sub$z, main_title="Z-score transformation", x_title=params$transform, y_title="Z-score", sampleName=samples )
  }
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[7]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
  
  ##### For normalisation ON
  if ( tolower(params$norm) != "none" ) {
    
    ##### For normalisation ON
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=transformed.data, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title=params$transform, sampleName=samples )
      
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo+1]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=y.sub$z, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title="Z-score", sampleName=samples )
      
  ##### For normalisation OFF
  } else {
    
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=transformed.data, dataset2=y.sub$z, main_title="Z-score transformation", x_title=params$transform, y_title="Z-score", sampleName=samples )
  }
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[8]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
  
  ##### For normalisation ON
  if ( tolower(params$norm) != "none" ) {
    
    ##### For normalisation ON
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=transformed.data, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title=params$transform, sampleName=samples )
      
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo+1]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=y.sub$z, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title="Z-score", sampleName=samples )
      
  ##### For normalisation OFF
  } else {
    
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=transformed.data, dataset2=y.sub$z, main_title="Z-score transformation", x_title=params$transform, y_title="Z-score", sampleName=samples )
  }
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[9]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
  
  ##### For normalisation ON
  if ( tolower(params$norm) != "none" ) {
    
    ##### For normalisation ON
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=transformed.data, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title=params$transform, sampleName=samples )
      
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo+1]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=y.sub$z, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title="Z-score", sampleName=samples )
      
  ##### For normalisation OFF
  } else {
    
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=transformed.data, dataset2=y.sub$z, main_title="Z-score transformation", x_title=params$transform, y_title="Z-score", sampleName=samples )
  }
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[10]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
  
  ##### For normalisation ON
  if ( tolower(params$norm) != "none" ) {
    
    ##### For normalisation ON
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=transformed.data, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title=params$transform, sampleName=samples )
      
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo+1]] <- comparPlot(genes=gene, dataset1=y.sub$norm, dataset2=y.sub$z, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), y_title="Z-score", sampleName=samples )
      
  ##### For normalisation OFF
  } else {
    
    ##### Check the effect of data Z-score transformation 
    widges.list[[plotsNo]] <- comparPlot(genes=gene, dataset1=transformed.data, dataset2=y.sub$z, main_title="Z-score transformation", x_title=params$transform, y_title="Z-score", sampleName=samples )
  }
}

##### Print a list of htmlwidgets
widges.list

Expression distribution patterns

Plots

Plot(s) illustrating CPM, normalised CPM (TMM) and z-score transformated data distributions for selected gene(s) along with simulated normal, binomial (p=0.25 and p=0.75) and bimodal distributions (computed based on the input data).


KRAS

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[1]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
    
  widges.list[[plotsNo]] <- densityPlot(genes=gene, data=transformed.data, main_title="", x_title=params$transform, sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
  
  ##### Normalised data 
  if ( tolower(params$norm) != "none" ) {
    
    plotsNo <- plotsNo + 1
    
    #### Interactive gene distribution plot
    widges.list[[plotsNo]] <- densityPlot(genes=gene, data=y.sub$norm, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), sampleName=samples, distributions = c("normal", "binomial", "bimodal") ) 
  }
  widges.list[[plotsNo+1]] <- densityPlot(genes=gene, data=y.sub$z, main_title="", x_title="Z-score", sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
}

##### Print a list of htmlwidgets
widges.list

CDKN2A

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[2]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
    
  widges.list[[plotsNo]] <- densityPlot(genes=gene, data=transformed.data, main_title="", x_title=params$transform, sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
  
  ##### Normalised data 
  if ( tolower(params$norm) != "none" ) {
    
    plotsNo <- plotsNo + 1
    
    #### Interactive gene distribution plot
    widges.list[[plotsNo]] <- densityPlot(genes=gene, data=y.sub$norm, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), sampleName=samples, distributions = c("normal", "binomial", "bimodal") ) 
  }
  widges.list[[plotsNo+1]] <- densityPlot(genes=gene, data=y.sub$z, main_title="", x_title="Z-score", sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
}

##### Print a list of htmlwidgets
widges.list

TP53

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[3]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
    
  widges.list[[plotsNo]] <- densityPlot(genes=gene, data=transformed.data, main_title="", x_title=params$transform, sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
  
  ##### Normalised data 
  if ( tolower(params$norm) != "none" ) {
    
    plotsNo <- plotsNo + 1
    
    #### Interactive gene distribution plot
    widges.list[[plotsNo]] <- densityPlot(genes=gene, data=y.sub$norm, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), sampleName=samples, distributions = c("normal", "binomial", "bimodal") ) 
  }
  widges.list[[plotsNo+1]] <- densityPlot(genes=gene, data=y.sub$z, main_title="", x_title="Z-score", sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
}

##### Print a list of htmlwidgets
widges.list

SMAD4

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[4]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
    
  widges.list[[plotsNo]] <- densityPlot(genes=gene, data=transformed.data, main_title="", x_title=params$transform, sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
  
  ##### Normalised data 
  if ( tolower(params$norm) != "none" ) {
    
    plotsNo <- plotsNo + 1
    
    #### Interactive gene distribution plot
    widges.list[[plotsNo]] <- densityPlot(genes=gene, data=y.sub$norm, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), sampleName=samples, distributions = c("normal", "binomial", "bimodal") ) 
  }
  widges.list[[plotsNo+1]] <- densityPlot(genes=gene, data=y.sub$z, main_title="", x_title="Z-score", sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[5]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
    
  widges.list[[plotsNo]] <- densityPlot(genes=gene, data=transformed.data, main_title="", x_title=params$transform, sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
  
  ##### Normalised data 
  if ( tolower(params$norm) != "none" ) {
    
    plotsNo <- plotsNo + 1
    
    #### Interactive gene distribution plot
    widges.list[[plotsNo]] <- densityPlot(genes=gene, data=y.sub$norm, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), sampleName=samples, distributions = c("normal", "binomial", "bimodal") ) 
  }
  widges.list[[plotsNo+1]] <- densityPlot(genes=gene, data=y.sub$z, main_title="", x_title="Z-score", sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[6]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
    
  widges.list[[plotsNo]] <- densityPlot(genes=gene, data=transformed.data, main_title="", x_title=params$transform, sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
  
  ##### Normalised data 
  if ( tolower(params$norm) != "none" ) {
    
    plotsNo <- plotsNo + 1
    
    #### Interactive gene distribution plot
    widges.list[[plotsNo]] <- densityPlot(genes=gene, data=y.sub$norm, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), sampleName=samples, distributions = c("normal", "binomial", "bimodal") ) 
  }
  widges.list[[plotsNo+1]] <- densityPlot(genes=gene, data=y.sub$z, main_title="", x_title="Z-score", sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[7]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
    
  widges.list[[plotsNo]] <- densityPlot(genes=gene, data=transformed.data, main_title="", x_title=params$transform, sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
  
  ##### Normalised data 
  if ( tolower(params$norm) != "none" ) {
    
    plotsNo <- plotsNo + 1
    
    #### Interactive gene distribution plot
    widges.list[[plotsNo]] <- densityPlot(genes=gene, data=y.sub$norm, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), sampleName=samples, distributions = c("normal", "binomial", "bimodal") ) 
  }
  widges.list[[plotsNo+1]] <- densityPlot(genes=gene, data=y.sub$z, main_title="", x_title="Z-score", sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[8]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
    
  widges.list[[plotsNo]] <- densityPlot(genes=gene, data=transformed.data, main_title="", x_title=params$transform, sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
  
  ##### Normalised data 
  if ( tolower(params$norm) != "none" ) {
    
    plotsNo <- plotsNo + 1
    
    #### Interactive gene distribution plot
    widges.list[[plotsNo]] <- densityPlot(genes=gene, data=y.sub$norm, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), sampleName=samples, distributions = c("normal", "binomial", "bimodal") ) 
  }
  widges.list[[plotsNo+1]] <- densityPlot(genes=gene, data=y.sub$z, main_title="", x_title="Z-score", sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[9]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
    
  widges.list[[plotsNo]] <- densityPlot(genes=gene, data=transformed.data, main_title="", x_title=params$transform, sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
  
  ##### Normalised data 
  if ( tolower(params$norm) != "none" ) {
    
    plotsNo <- plotsNo + 1
    
    #### Interactive gene distribution plot
    widges.list[[plotsNo]] <- densityPlot(genes=gene, data=y.sub$norm, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), sampleName=samples, distributions = c("normal", "binomial", "bimodal") ) 
  }
  widges.list[[plotsNo+1]] <- densityPlot(genes=gene, data=y.sub$z, main_title="", x_title="Z-score", sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
}

##### Print a list of htmlwidgets
widges.list

##### Create a list for htmlwidgets
widges.list <- htmltools::tagList()

gene <- genes[10]
plotsNo <- 1

##### Plot expression distribution in transfromed and normalised data for selected genes
if ( !is.na(gene) ) {
  
  if ( params$filter ) {
    transformed.data <- y.sub$filtered.transformed
  } else {
    transformed.data <- y.sub$transformed
  }
    
  widges.list[[plotsNo]] <- densityPlot(genes=gene, data=transformed.data, main_title="", x_title=params$transform, sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
  
  ##### Normalised data 
  if ( tolower(params$norm) != "none" ) {
    
    plotsNo <- plotsNo + 1
    
    #### Interactive gene distribution plot
    widges.list[[plotsNo]] <- densityPlot(genes=gene, data=y.sub$norm, main_title="", x_title=paste0("Normalised ", params$transform, " (", params$norm, ")"), sampleName=samples, distributions = c("normal", "binomial", "bimodal") ) 
  }
  widges.list[[plotsNo+1]] <- densityPlot(genes=gene, data=y.sub$z, main_title="", x_title="Z-score", sampleName=samples, distributions = c("normal", "binomial",  "bimodal") )
}

##### Print a list of htmlwidgets
widges.list

Table

Table with CPM data values for each sample for selected genes.

Transformed data

##### Generate a table with expression values for selected genes
if ( params$filter ) {
  
 if ( nrow(y.sub$filtered.transformed) < 11 ) {
    
    DT::datatable( data = round(t(y.sub$filtered.transformed[genes,]), digits = 1), filter="none", rownames = TRUE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, caption = htmltools::tags$caption( style = 'caption-side: top; text-align: left; color:grey; font-size:100% ;'), escape = FALSE) %>%
      DT::formatStyle( columns = names(t(y.sub$filtered.transformed)), `font-size` = '12px', 'text-align' = 'center' )
    
  } else {
    cat(paste0(nrow(y.sub$filtered.transformed), " genes were selected. The tables is available for not more than 10 genes!"))
  }

} else {
  
  if ( nrow(y.sub$transformed) < 11 ) {
    
    DT::datatable( data = round(t(y.sub$transformed[genes,]), digits = 1), filter="none", rownames = TRUE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, caption = htmltools::tags$caption( style = 'caption-side: top; text-align: left; color:grey; font-size:100% ;'), escape = FALSE) %>%
      DT::formatStyle( columns = names(t(y.sub$transformed)), `font-size` = '12px', 'text-align' = 'center' )
    
  } else {
    cat(paste0(nrow(y.sub$transformed), " genes were selected. The tables is available for not more than 10 genes!"))
  }
}

Normalised data

##### Generate a table with expression values for selected genes
if ( tolower(params$norm) != "none" ) {
  
  if ( nrow(y.sub$norm) < 11 ) {
    
    DT::datatable( data = round(t(y.sub$norm[genes,]), digits = 1), filter="none", rownames = TRUE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, caption = htmltools::tags$caption( style = 'caption-side: top; text-align: left; color:grey; font-size:100% ;'), escape = FALSE) %>%
      DT::formatStyle( columns = names(t(y.sub$norm)), `font-size` = '12px', 'text-align' = 'center' )
    
  } else {
    cat(paste0(nrow(y.sub$norm), " genes were selected. The tables is available for not more than 10 genes!"))
  }
}

Z-score data

##### Generate a table with expression values for selected genes
if ( nrow(y.sub$z) < 11 ) {
    
  DT::datatable( data = round(t(y.sub$z[genes,]), digits = 1), filter="none", rownames = TRUE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, caption = htmltools::tags$caption( style = 'caption-side: top; text-align: left; color:grey; font-size:100% ;'), escape = FALSE) %>%
      DT::formatStyle( columns = names(t(y.sub$z)), `font-size` = '12px', 'text-align' = 'center' )
    
  } else {
  cat(paste0(nrow(y.sub$z), " genes were selected. The tables is available for not more than 10 genes!"))
}

Addendum

Parameters

for ( i in 1:length(params) ) {
  cat(paste("Parameter: ", names(params)[i], "\nValue: ", paste(unlist(params[i]), collapse = ","), "\n\n", sep=""))
}
Parameter: ensembl_version
Value: 86

Parameter: exprDir
Value: /Users/jmarzec/data/Pancreatic_data_harmonization/expression/in-house/UMCCR/Combined_data

Parameter: exprFile
Value: UMCCR_PC.counts.matrix.txt.subset.txt

Parameter: annotFile
Value: UMCCR_PC_Target_cleaned.txt

Parameter: transform
Value: CPM

Parameter: norm
Value: TMM

Parameter: filter
Value: TRUE

Parameter: log
Value: TRUE

Parameter: genes
Value: KRAS,CDKN2A,TP53,SMAD4

Parameter: ensembl
Value: TRUE

Parameter: samples
Value: CCR170012_MH17T001P013,CCR180029_MH18T002P038_RNA

Parameter: results_name
Value: expression_distributions_results_CPM_TMM

Session info

devtools::session_info()
─ Session info ──────────────────────────────────────────────────────────
 setting  value                       
 version  R version 3.5.0 (2018-04-23)
 os       macOS  10.14.4              
 system   x86_64, darwin15.6.0        
 ui       X11                         
 language (EN)                        
 collate  en_AU.UTF-8                 
 ctype    en_AU.UTF-8                 
 tz       Australia/Melbourne         
 date     2019-05-13                  

─ Packages ──────────────────────────────────────────────────────────────
 package              * version   date       lib source        
 AnnotationDbi        * 1.44.0    2018-10-30 [1] Bioconductor  
 AnnotationFilter     * 1.6.0     2018-10-30 [1] Bioconductor  
 assertthat             0.2.1     2019-03-21 [1] CRAN (R 3.5.2)
 backports              1.1.4     2019-04-10 [1] CRAN (R 3.5.2)
 Biobase              * 2.42.0    2018-10-30 [1] Bioconductor  
 BiocGenerics         * 0.28.0    2018-10-30 [1] Bioconductor  
 BiocParallel           1.16.6    2019-02-10 [1] Bioconductor  
 biomaRt                2.38.0    2018-10-30 [1] Bioconductor  
 Biostrings             2.50.2    2019-01-03 [1] Bioconductor  
 bit                    1.1-14    2018-05-29 [1] CRAN (R 3.5.0)
 bit64                  0.9-7     2017-05-08 [1] CRAN (R 3.5.0)
 bitops                 1.0-6     2013-08-17 [1] CRAN (R 3.5.0)
 blob                   1.1.1     2018-03-25 [1] CRAN (R 3.5.0)
 broom                  0.5.2     2019-04-07 [1] CRAN (R 3.5.2)
 callr                  3.2.0     2019-03-15 [1] CRAN (R 3.5.0)
 cellranger             1.1.0     2016-07-27 [1] CRAN (R 3.5.0)
 cli                    1.1.0     2019-03-19 [1] CRAN (R 3.5.0)
 colorspace             1.4-1     2019-03-18 [1] CRAN (R 3.5.2)
 crayon                 1.3.4     2017-09-16 [1] CRAN (R 3.5.0)
 crosstalk              1.0.0     2016-12-21 [1] CRAN (R 3.5.0)
 curl                   3.3       2019-01-10 [1] CRAN (R 3.5.2)
 data.table             1.12.2    2019-04-07 [1] CRAN (R 3.5.2)
 DBI                    1.0.0     2018-05-02 [1] CRAN (R 3.5.0)
 DelayedArray           0.8.0     2018-10-30 [1] Bioconductor  
 desc                   1.2.0     2018-05-01 [1] CRAN (R 3.5.0)
 devtools               2.0.2     2019-04-08 [1] CRAN (R 3.5.2)
 digest                 0.6.18    2018-10-10 [1] CRAN (R 3.5.0)
 dplyr                * 0.8.0.1   2019-02-15 [1] CRAN (R 3.5.2)
 DT                     0.5       2018-11-05 [1] CRAN (R 3.5.0)
 edgeR                * 3.24.3    2019-01-02 [1] Bioconductor  
 EnsDb.Hsapiens.v86   * 2.99.0    2018-10-22 [1] Bioconductor  
 ensembldb            * 2.6.8     2019-04-03 [1] Bioconductor  
 evaluate               0.13      2019-02-12 [1] CRAN (R 3.5.0)
 forcats              * 0.4.0     2019-02-17 [1] CRAN (R 3.5.2)
 fs                     1.2.7     2019-03-19 [1] CRAN (R 3.5.2)
 generics               0.0.2     2018-11-29 [1] CRAN (R 3.5.0)
 GenomeInfoDb         * 1.18.2    2019-02-12 [1] Bioconductor  
 GenomeInfoDbData       1.2.0     2018-11-13 [1] Bioconductor  
 GenomicAlignments      1.18.1    2019-01-04 [1] Bioconductor  
 GenomicFeatures      * 1.34.8    2019-04-10 [1] Bioconductor  
 GenomicRanges        * 1.34.0    2018-10-30 [1] Bioconductor  
 getopt                 1.20.3    2019-03-22 [1] CRAN (R 3.5.2)
 ggplot2              * 3.1.1     2019-04-07 [1] CRAN (R 3.5.2)
 glue                   1.3.1     2019-03-12 [1] CRAN (R 3.5.2)
 gtable                 0.3.0     2019-03-25 [1] CRAN (R 3.5.2)
 haven                  2.1.0     2019-02-19 [1] CRAN (R 3.5.2)
 hms                    0.4.2     2018-03-10 [1] CRAN (R 3.5.0)
 htmltools              0.3.6     2017-04-28 [1] CRAN (R 3.5.0)
 htmlwidgets            1.3       2018-09-30 [1] CRAN (R 3.5.0)
 httpuv                 1.5.1     2019-04-05 [1] CRAN (R 3.5.2)
 httr                   1.4.0     2018-12-11 [1] CRAN (R 3.5.0)
 IRanges              * 2.16.0    2018-10-30 [1] Bioconductor  
 jsonlite               1.6       2018-12-07 [1] CRAN (R 3.5.0)
 knitr                  1.22      2019-03-08 [1] CRAN (R 3.5.2)
 later                  0.8.0     2019-02-11 [1] CRAN (R 3.5.2)
 lattice                0.20-38   2018-11-04 [1] CRAN (R 3.5.0)
 lazyeval               0.2.2     2019-03-15 [1] CRAN (R 3.5.2)
 limma                * 3.38.3    2018-12-02 [1] Bioconductor  
 locfit                 1.5-9.1   2013-04-20 [1] CRAN (R 3.5.0)
 lubridate              1.7.4     2018-04-11 [1] CRAN (R 3.5.0)
 magrittr               1.5       2014-11-22 [1] CRAN (R 3.5.0)
 Matrix                 1.2-17    2019-03-22 [1] CRAN (R 3.5.2)
 matrixStats            0.54.0    2018-07-23 [1] CRAN (R 3.5.0)
 memoise                1.1.0     2017-04-21 [1] CRAN (R 3.5.0)
 mime                   0.6       2018-10-05 [1] CRAN (R 3.5.0)
 modelr                 0.1.4     2019-02-18 [1] CRAN (R 3.5.2)
 munsell                0.5.0     2018-06-12 [1] CRAN (R 3.5.0)
 nlme                   3.1-139   2019-04-09 [1] CRAN (R 3.5.2)
 optparse             * 1.6.2     2019-04-02 [1] CRAN (R 3.5.2)
 pillar                 1.3.1     2018-12-15 [1] CRAN (R 3.5.0)
 pkgbuild               1.0.3     2019-03-20 [1] CRAN (R 3.5.2)
 pkgconfig              2.0.2     2018-08-16 [1] CRAN (R 3.5.0)
 pkgload                1.0.2     2018-10-29 [1] CRAN (R 3.5.0)
 plotly                 4.9.0     2019-04-10 [1] CRAN (R 3.5.0)
 plyr                   1.8.4     2016-06-08 [1] CRAN (R 3.5.0)
 preprocessCore       * 1.44.0    2018-10-30 [1] Bioconductor  
 prettyunits            1.0.2     2015-07-13 [1] CRAN (R 3.5.0)
 processx               3.3.0     2019-03-10 [1] CRAN (R 3.5.2)
 progress               1.2.0     2018-06-14 [1] CRAN (R 3.5.0)
 promises               1.0.1     2018-04-13 [1] CRAN (R 3.5.0)
 ProtGenerics           1.14.0    2018-10-30 [1] Bioconductor  
 ps                     1.3.0     2018-12-21 [1] CRAN (R 3.5.0)
 purrr                * 0.3.2     2019-03-15 [1] CRAN (R 3.5.2)
 R6                     2.4.0     2019-02-14 [1] CRAN (R 3.5.2)
 RColorBrewer         * 1.1-2     2014-12-07 [1] CRAN (R 3.5.0)
 Rcpp                   1.0.1     2019-03-17 [1] CRAN (R 3.5.2)
 RCurl                  1.95-4.12 2019-03-04 [1] CRAN (R 3.5.2)
 readr                * 1.3.1     2018-12-21 [1] CRAN (R 3.5.0)
 readxl                 1.3.1     2019-03-13 [1] CRAN (R 3.5.2)
 remotes                2.0.4     2019-04-10 [1] CRAN (R 3.5.2)
 rlang                  0.3.4     2019-04-07 [1] CRAN (R 3.5.2)
 rmarkdown              1.12      2019-03-14 [1] CRAN (R 3.5.0)
 rprojroot              1.3-2     2018-01-03 [1] CRAN (R 3.5.0)
 Rsamtools              1.34.1    2019-01-31 [1] Bioconductor  
 RSQLite                2.1.1     2018-05-06 [1] CRAN (R 3.5.0)
 rstudioapi             0.10      2019-03-19 [1] CRAN (R 3.5.0)
 rtracklayer            1.42.2    2019-03-01 [1] Bioconductor  
 rvest                  0.3.3     2019-04-11 [1] CRAN (R 3.5.2)
 S4Vectors            * 0.20.1    2018-11-09 [1] Bioconductor  
 scales               * 1.0.0     2018-08-09 [1] CRAN (R 3.5.0)
 sessioninfo            1.1.1     2018-11-05 [1] CRAN (R 3.5.0)
 shiny                  1.3.1     2019-04-12 [1] CRAN (R 3.5.2)
 stringi                1.4.3     2019-03-12 [1] CRAN (R 3.5.0)
 stringr              * 1.4.0     2019-02-10 [1] CRAN (R 3.5.2)
 SummarizedExperiment   1.12.0    2018-10-30 [1] Bioconductor  
 testthat               2.0.1     2018-10-13 [1] CRAN (R 3.5.0)
 tibble               * 2.1.1     2019-03-16 [1] CRAN (R 3.5.2)
 tidyr                * 0.8.3     2019-03-01 [1] CRAN (R 3.5.2)
 tidyselect             0.2.5     2018-10-11 [1] CRAN (R 3.5.0)
 tidyverse            * 1.2.1     2017-11-14 [1] CRAN (R 3.5.0)
 truncnorm            * 1.0-8     2018-02-27 [1] CRAN (R 3.5.0)
 usethis                1.5.0     2019-04-07 [1] CRAN (R 3.5.2)
 viridisLite            0.3.0     2018-02-01 [1] CRAN (R 3.5.0)
 withr                  2.1.2     2018-03-15 [1] CRAN (R 3.5.0)
 xfun                   0.6       2019-04-02 [1] CRAN (R 3.5.2)
 XML                    3.98-1.19 2019-03-06 [1] CRAN (R 3.5.2)
 xml2                   1.2.0     2018-01-24 [1] CRAN (R 3.5.0)
 xtable                 1.8-3     2018-08-29 [1] CRAN (R 3.5.0)
 XVector                0.22.0    2018-10-30 [1] Bioconductor  
 yaml                   2.2.0     2018-07-25 [1] CRAN (R 3.5.0)
 zlibbioc               1.28.0    2018-10-30 [1] Bioconductor  

[1] /Library/Frameworks/R.framework/Versions/3.5/Resources/library